pacman::p_load(sf, sfdep, tmap, tidyverse, knitr, plotly, zoo, Kendall, mapview)Take-home-project
Geospatial Proj
Getting Started
Lets make make sure that sfdep, sf, tmap and tidyverse packages of R are currently installed
Importing OD data into R
Firstly we will import the Passenger volume by Origin Destination Bus Stops data set downloaded from LTA DataMall by using read_csv() of readr package.
odbus <- read_csv("data/aspatial/origin_destination_bus_202308.csv")
glimpse(odbus)Rows: 5,709,512
Columns: 7
$ YEAR_MONTH <chr> "2023-08", "2023-08", "2023-08", "2023-08", "2023-…
$ DAY_TYPE <chr> "WEEKDAY", "WEEKENDS/HOLIDAY", "WEEKENDS/HOLIDAY",…
$ TIME_PER_HOUR <dbl> 16, 16, 14, 14, 17, 17, 17, 17, 7, 17, 14, 10, 10,…
$ PT_TYPE <chr> "BUS", "BUS", "BUS", "BUS", "BUS", "BUS", "BUS", "…
$ ORIGIN_PT_CODE <chr> "04168", "04168", "80119", "80119", "44069", "4406…
$ DESTINATION_PT_CODE <chr> "10051", "10051", "90079", "90079", "17229", "1722…
$ TOTAL_TRIPS <dbl> 7, 2, 3, 10, 5, 4, 3, 22, 3, 3, 7, 1, 3, 1, 3, 1, …
Origin & Destination Bus Stop Code
odbus$ORIGIN_PT_CODE <-
as.factor(odbus$ORIGIN_PT_CODE)
odbus$DESTINATION_PT_CODE <-
as.factor(odbus$DESTINATION_PT_CODE)Extracting study data
We will filter out the data according to the requirements set out by Professor 1.) “Weekday @ 6-9am”
origtrip_6_9 <- odbus %>%
filter(DAY_TYPE == "WEEKDAY") %>%
filter(TIME_PER_HOUR >= 6 &
TIME_PER_HOUR <= 9) %>%
group_by(ORIGIN_PT_CODE) %>%
summarise(TRIPS = sum(TOTAL_TRIPS))kable(head(origtrip_6_9))| ORIGIN_PT_CODE | TRIPS |
|---|---|
| 01012 | 1973 |
| 01013 | 952 |
| 01019 | 1789 |
| 01029 | 2561 |
| 01039 | 2938 |
| 01059 | 1651 |
write_rds(origtrip_6_9, "data/rds/origtrip_6_9.rds")2.) “Weekday @ 5-8pm”
origtrip_17_20 <- odbus %>%
filter(DAY_TYPE == "WEEKDAY") %>%
filter(TIME_PER_HOUR >= 17 &
TIME_PER_HOUR <= 20) %>%
group_by(ORIGIN_PT_CODE) %>%
summarise(TRIPS = sum(TOTAL_TRIPS))
glimpse(origtrip_17_20)Rows: 5,035
Columns: 2
$ ORIGIN_PT_CODE <fct> 01012, 01013, 01019, 01029, 01039, 01059, 01109, 01112,…
$ TRIPS <dbl> 8448, 7328, 3608, 9317, 12937, 2133, 322, 45010, 27233,…
kable(head(origtrip_17_20))| ORIGIN_PT_CODE | TRIPS |
|---|---|
| 01012 | 8448 |
| 01013 | 7328 |
| 01019 | 3608 |
| 01029 | 9317 |
| 01039 | 12937 |
| 01059 | 2133 |
write_rds(origtrip_17_20, "data/rds/origtrip_17_20.rds")3.) “Weekend @ 11am-2pm”
origtrip_11_14 <- odbus %>%
filter(DAY_TYPE == "WEEKENDS/HOLIDAY") %>%
filter(TIME_PER_HOUR >= 11 &
TIME_PER_HOUR <= 14) %>%
group_by(ORIGIN_PT_CODE) %>%
summarise(TRIPS = sum(TOTAL_TRIPS))kable(head(origtrip_11_14))| ORIGIN_PT_CODE | TRIPS |
|---|---|
| 01012 | 2273 |
| 01013 | 1697 |
| 01019 | 1511 |
| 01029 | 3272 |
| 01039 | 5424 |
| 01059 | 1062 |
write_rds(origtrip_11_14, "data/rds/origtrip_11_14.rds")4.) “Weekend @ 4-7pm”
origtrip_16_19 <- odbus %>%
filter(DAY_TYPE == "WEEKENDS/HOLIDAY") %>%
filter(TIME_PER_HOUR >= 16 &
TIME_PER_HOUR <= 19) %>%
group_by(ORIGIN_PT_CODE) %>%
summarise(TRIPS = sum(TOTAL_TRIPS))kable(head(origtrip_16_19))| ORIGIN_PT_CODE | TRIPS |
|---|---|
| 01012 | 3208 |
| 01013 | 2796 |
| 01019 | 1623 |
| 01029 | 4244 |
| 01039 | 7403 |
| 01059 | 1190 |
write_rds(origtrip_16_19, "data/rds/origtrip_16_19.rds")origtrip_11_14 <- read_rds("data/rds/origtrip_11_14.rds")
origtrip_16_19 <- read_rds("data/rds/origtrip_16_19.rds")
origtrip_17_20 <- read_rds("data/rds/origtrip_17_20.rds")
origtrip_6_9 <- read_rds("data/rds/origtrip_6_9.rds")Importing geospatial data
busstop <- st_read(dsn = "data/geospatial",
layer = "BusStop") %>%
st_transform(crs = 3414)Reading layer `BusStop' from data source
`/Users/youting/ytquek/ISSS624/Project/data/geospatial' using driver `ESRI Shapefile'
Simple feature collection with 5161 features and 3 fields
Geometry type: POINT
Dimension: XY
Bounding box: xmin: 3970.122 ymin: 26482.1 xmax: 48284.56 ymax: 52983.82
Projected CRS: SVY21
Glimpse the Bus Stop tibble data frame
glimpse(busstop)Rows: 5,161
Columns: 4
$ BUS_STOP_N <chr> "22069", "32071", "44331", "96081", "11561", "66191", "2338…
$ BUS_ROOF_N <chr> "B06", "B23", "B01", "B05", "B05", "B03", "B02A", "B02", "B…
$ LOC_DESC <chr> "OPP CEVA LOGISTICS", "AFT TRACK 13", "BLK 239", "GRACE IND…
$ geometry <POINT [m]> POINT (13576.31 32883.65), POINT (13228.59 44206.38),…
Load Map into MPSZ
mpsz <- st_read(dsn = "data/geospatial",
layer = "MPSZ-2019") %>%
st_transform(crs = 3414)Reading layer `MPSZ-2019' from data source
`/Users/youting/ytquek/ISSS624/Project/data/geospatial' using driver `ESRI Shapefile'
Simple feature collection with 332 features and 6 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: 103.6057 ymin: 1.158699 xmax: 104.0885 ymax: 1.470775
Geodetic CRS: WGS 84
glimpse(mpsz)Rows: 332
Columns: 7
$ SUBZONE_N <chr> "MARINA EAST", "INSTITUTION HILL", "ROBERTSON QUAY", "JURON…
$ SUBZONE_C <chr> "MESZ01", "RVSZ05", "SRSZ01", "WISZ01", "MUSZ02", "MPSZ05",…
$ PLN_AREA_N <chr> "MARINA EAST", "RIVER VALLEY", "SINGAPORE RIVER", "WESTERN …
$ PLN_AREA_C <chr> "ME", "RV", "SR", "WI", "MU", "MP", "WI", "WI", "SI", "SI",…
$ REGION_N <chr> "CENTRAL REGION", "CENTRAL REGION", "CENTRAL REGION", "WEST…
$ REGION_C <chr> "CR", "CR", "CR", "WR", "CR", "CR", "WR", "WR", "CR", "CR",…
$ geometry <MULTIPOLYGON [m]> MULTIPOLYGON (((33222.98 29..., MULTIPOLYGON (…
Geospatial Data Wrangling
Combining Busstop and mpsz
busstop_mpsz <- st_intersection(busstop, mpsz) %>%
select(BUS_STOP_N, SUBZONE_C) %>%
st_drop_geometry()
glimpse(busstop_mpsz)Rows: 5,156
Columns: 2
$ BUS_STOP_N <chr> "13099", "13089", "06151", "13211", "13139", "13109", "1311…
$ SUBZONE_C <chr> "RVSZ05", "RVSZ05", "SRSZ01", "SRSZ01", "SRSZ01", "SRSZ01",…
write_rds(busstop_mpsz, "data/rds/busstop_mpsz.csv") origin_6_9 <- left_join(origtrip_6_9 , busstop_mpsz,
by = c("ORIGIN_PT_CODE" = "BUS_STOP_N")) %>%
rename(ORIGIN_BS = ORIGIN_PT_CODE,
ORIGIN_SZ = SUBZONE_C) %>%
group_by(ORIGIN_SZ) %>%
summarise(TOT_TRIPS = sum(TRIPS))
glimpse(origin_6_9)Rows: 312
Columns: 2
$ ORIGIN_SZ <chr> "AMSZ01", "AMSZ02", "AMSZ03", "AMSZ04", "AMSZ05", "AMSZ06", …
$ TOT_TRIPS <dbl> 116600, 226521, 199437, 114549, 70770, 102390, 23231, 28011,…
origin_17_20 <- left_join(origtrip_17_20 , busstop_mpsz,
by = c("ORIGIN_PT_CODE" = "BUS_STOP_N")) %>%
rename(ORIGIN_BS = ORIGIN_PT_CODE,
ORIGIN_SZ = SUBZONE_C) %>%
group_by(ORIGIN_SZ) %>%
summarise(TOT_TRIPS = sum(TRIPS))origin_11_14 <- left_join(origtrip_11_14 , busstop_mpsz,
by = c("ORIGIN_PT_CODE" = "BUS_STOP_N")) %>%
rename(ORIGIN_BS = ORIGIN_PT_CODE,
ORIGIN_SZ = SUBZONE_C) %>%
group_by(ORIGIN_SZ) %>%
summarise(TOT_TRIPS = sum(TRIPS))origin_16_19 <- left_join(origtrip_16_19 , busstop_mpsz,
by = c("ORIGIN_PT_CODE" = "BUS_STOP_N")) %>%
rename(ORIGIN_BS = ORIGIN_PT_CODE,
ORIGIN_SZ = SUBZONE_C) %>%
group_by(ORIGIN_SZ) %>%
summarise(TOT_TRIPS = sum(TRIPS))Setting up the Hexagon Grid
Drawing the Hexagon Grid
Drawing the hexagon grid over the mpsz map
area_honeycomb_grid = st_make_grid(mpsz, c(250, 250), what = "polygons", square = FALSE)To sf and add grid ID
honeycomb_grid_sf = st_sf(area_honeycomb_grid) %>%
mutate(grid_id = 1:length(lengths(area_honeycomb_grid)))Determine which bus stops is contained within which hexagon using st_within.
busstop_honeycomb <- st_intersection(honeycomb_grid_sf,busstop) %>%
select(BUS_STOP_N, grid_id) %>%
st_drop_geometry()write_rds(busstop_honeycomb, "data/rds/busstop_honeycomb.csv")Check for duplicate records
duplicate <- busstop_honeycomb %>%
group_by_all() %>%
filter(n()>1) %>%
ungroup()busstop_honeycomb <- unique(busstop_honeycomb)remove grid without value of 0 (i.e. no points in side that grid)
busstop_honeycomb <- busstop_honeycomb %>% filter(!is.na(grid_id) & grid_id > 0)
Assign every Bus Stop with a Grid ID
origin_6_9 <- left_join(busstop_honeycomb, origtrip_6_9,
by = c("BUS_STOP_N" = "ORIGIN_PT_CODE"))
origin_6_9 <- origin_6_9 %>%
filter(!is.na(TRIPS) & TRIPS > 0)
origin_17_20 <- left_join(busstop_honeycomb, origtrip_17_20,
by = c("BUS_STOP_N" = "ORIGIN_PT_CODE"))
origin_17_20 <- origin_17_20 %>%
filter(!is.na(TRIPS) & TRIPS > 0)
origin_11_14 <- left_join(busstop_honeycomb, origtrip_11_14,
by = c("BUS_STOP_N" = "ORIGIN_PT_CODE"))
origin_11_14 <- origin_11_14 %>%
filter(!is.na(TRIPS) & TRIPS > 0)
origin_16_19 <- left_join(busstop_honeycomb, origtrip_16_19,
by = c("BUS_STOP_N" = "ORIGIN_PT_CODE"))
origin_16_19 <- origin_16_19 %>%
filter(!is.na(TRIPS) & TRIPS > 0)Choropleth Visualisation
Weekday Morning Peak 6am-9am
total_trips_by_grid <- origin_6_9 %>%
group_by(grid_id) %>%
summarise(total_trips = sum(TRIPS, na.rm = TRUE))Merge geospatial data
total_trips_by_grid <- total_trips_by_grid %>%
left_join(honeycomb_grid_sf, by = c("grid_id" = "grid_id"))
total_trips_by_grid_sf <- st_sf(total_trips_by_grid)Plot the Choropleth map
tmap_mode("view")
map_honeycomb = tm_shape(total_trips_by_grid_sf) +
tm_fill(
col = "total_trips",
palette = "Reds",
style = "cont",
title = "Total Trips Taken - Weekday Morning Peak 6-9am",
id = "grid_id",
showNA = FALSE,
alpha = 0.6,
popup.vars = c(
"Number of trips: " = "total_trips"
),
popup.format = list(
total_trips = list(format = "f", digits = 0)
)
) +
tm_borders(col = "grey40", lwd = 0.4)
map_honeycombWeekday Afternoon Peak 5pm - 8pm
total_trips_by_grid <- origin_17_20 %>%
group_by(grid_id) %>%
summarise(total_trips = sum(TRIPS, na.rm = TRUE))Merge geospatial data
total_trips_by_grid <- total_trips_by_grid %>%
left_join(honeycomb_grid_sf, by = c("grid_id" = "grid_id"))
total_trips_by_grid_sf <- st_sf(total_trips_by_grid)Plot the Choropleth map
tmap_mode("view")
map_honeycomb = tm_shape(total_trips_by_grid_sf) +
tm_fill(
col = "total_trips",
palette = "Reds",
style = "cont",
title = "Total Trips Taken - Weekday Morning Peak 6-9am",
id = "grid_id",
showNA = FALSE,
alpha = 0.6,
popup.vars = c(
"Number of trips: " = "total_trips"
),
popup.format = list(
total_trips = list(format = "f", digits = 0)
)
) +
tm_borders(col = "grey40", lwd = 0.4)
map_honeycombWeekday/Weekend Morning Peak 11am-2pm
total_trips_by_grid <- origin_11_14 %>%
group_by(grid_id) %>%
summarise(total_trips = sum(TRIPS, na.rm = TRUE))Merge geospatial data
total_trips_by_grid <- total_trips_by_grid %>%
left_join(honeycomb_grid_sf, by = c("grid_id" = "grid_id"))
total_trips_by_grid_sf <- st_sf(total_trips_by_grid)Plot the Choropleth map
tmap_mode("view")
map_honeycomb = tm_shape(total_trips_by_grid_sf) +
tm_fill(
col = "total_trips",
palette = "Reds",
style = "cont",
title = "Total Trips Taken - Weekday Morning Peak 6-9am",
id = "grid_id",
showNA = FALSE,
alpha = 0.6,
popup.vars = c(
"Number of trips: " = "total_trips"
),
popup.format = list(
total_trips = list(format = "f", digits = 0)
)
) +
tm_borders(col = "grey40", lwd = 0.4)
map_honeycombWeekend/Holiday Peak 4pm-7pm
total_trips_by_grid <- origin_16_19 %>%
group_by(grid_id) %>%
summarise(total_trips = sum(TRIPS, na.rm = TRUE))Merge geospatial data
total_trips_by_grid <- total_trips_by_grid %>%
left_join(honeycomb_grid_sf, by = c("grid_id" = "grid_id"))
total_trips_by_grid_sf <- st_sf(total_trips_by_grid)Plot the Choropleth map
tmap_mode("view")
map_honeycomb = tm_shape(total_trips_by_grid_sf) +
tm_fill(
col = "total_trips",
palette = "Reds",
style = "cont",
title = "Total Trips Taken - Weekday Morning Peak 6-9am",
id = "grid_id",
showNA = FALSE,
alpha = 0.6,
popup.vars = c(
"Number of trips: " = "total_trips"
),
popup.format = list(
total_trips = list(format = "f", digits = 0)
)
) +
tm_borders(col = "grey40", lwd = 0.4)
map_honeycomb